NOTE: The dataset, visualizations, and result outputs in this presentation are not representative for any types of business, users, reviews in Yelp.
a. Pulling the data
dataGroupByStateStar <- ylpDataSmall %>%
filter(state != '') %>% mutate(tsum = n()) %>%
group_by(state, stars)
dataForTableByStateStar <- dataGroupByStateStar %>% group_by(state) %>%
summarise(total_business = n(), total_reviews = sum(review_count), avg_rating = round(mean(stars), 2))
b. Loading the data on the Leaflet map
library(leaflet)
leaflet(dataTotalAvgStarByState) %>% addTiles() %>% setView(lng = -96.503906,
lat = 38.68551, zoom = 4) %>% addCircles(lng = ~city_lng, lat = ~city_lat,
weight = 0, radius = ~exp(totAvgRatingByState * 1.4) * 800, fillOpacity = 0.5,
color = ~myCol(totAvgRatingByState), popup = ~totAvgRatingByState) %>% addLegend("bottomleft",
pal = myCol, values = ~sort(totAvgRatingByState), title = "Avg.Ratings",
labFormat = labelFormat(prefix = ""), opacity = 0.5)
a. Pulling the data
dataWeightedGroupByStateStar <- dataGroupByStateStar %>%
summarise(totalByStar = n()) %>% arrange(desc(stars)) %>%
mutate(total = sum(totalByStar)) %>% mutate(percent = round((totalByStar / total)*100, 1)) %>%
mutate(percentWeight = ifelse(percent >= 20, percent * 2.5, # custom column to weight the percent for size on the plot
ifelse(percent < 20 & percent >= 15, percent * 1.2,
ifelse(percent < 15 & percent >= 10, percent,
ifelse(percent < 10 & percent >= 5, percent * 0.8, 1)))))
b. Loading the data on the ggplot bubble plot
library(ggplot2)
ggplot(dataWeightedGroupByStateStar, aes(x = state, y = stars, label = percent)) +
geom_point(aes(size = percentWeight * 2, colour = stars, alpha = 0.05)) +
geom_text(hjust = 0.4, size = 4) + scale_size(range = c(1, 30), guide = "none") +
scale_color_gradient(low = "darkblue", high = "red") + labs(title = "A grid of detailed avg.ratings by state ",
x = "State", y = "Detailed Avg.Ratings") + scale_y_continuous(breaks = seq(1,
5, 0.5)) + theme(legend.title = element_blank())
—
a. Pulling the data
ylpUserSmElite <- ylpUserSm3 %>% filter(elite != "[]")
ylpUserSmNormal <- ylpUserSm3 %>% filter(elite == "[]")
b. Loading the data on the box plot
library(ggthemes)
# Yelp users in the boxplot
qplot(fans, review_count, data = ylpUserSm3, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Total review counts by the number of fans") +
theme(legend.position = "none")
# Elite Yelp group users in the boxplot
qplot(fans, review_count, data = ylpUserSmElite, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Total review counts by the number of fans (Elite users)") +
theme(legend.position = "none")
# Non-elite Yelp group users in the boxplot
qplot(fans, review_count, data = ylpUserSmNormal, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Total review counts by the number of fans (Non-elite Users)") +
theme(legend.position = "none")
c. Loading the data on the combination plots (point+smooth)
# Yelp users in combination plots
qplot(fans, review_count, data = ylpUserSm1, geom = c("point", "smooth"), colour = fans) +
labs(title = "Total review counts by the number of fans") + scale_color_gradient(low = "darkblue",
high = "darkred") + stat_smooth(fill = "green", colour = "cyan", size = 1,
alpha = 0.1)
# Elite Yelp group users in combination plots
qplot(fans, review_count, data = ylpUserSmElite, geom = c("point", "smooth"),
colour = fans) + labs(title = "Total review counts by the number of fans (Elite users)") +
scale_color_gradient(low = "darkblue", high = "darkred") + stat_smooth(fill = "green",
colour = "cyan", size = 1, alpha = 0.1)
# Non-elite Yelp group users in combination plots
qplot(fans, review_count, data = ylpUserSmNormal, geom = c("point", "smooth"),
colour = fans) + labs(title = "Total review counts by the number of fans (Non-elite users)") +
scale_color_gradient(low = "darkblue", high = "darkred") + stat_smooth(fill = "green",
colour = "cyan", size = 1, alpha = 0.1)
a. Loading the data on the box plot
# Yelp users in the boxplot
qplot(fans, average_stars, data = ylpUserSm3, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Average ratings by the number of fans") +
theme(legend.position = "none")
# Elite Yelp group users in the boxplot
qplot(fans, average_stars, data = ylpUserSmElite, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Average ratings by the number of fans (Elite users)") +
theme(legend.position = "none")
# Non-elite Yelp group users in the boxplot
qplot(fans, average_stars, data = ylpUserSmNormal, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Average ratings by the number of fans (Non-elite users)") +
theme(legend.position = "none")
c. Loading the data on the combination plots (point+smooth)
# Yelp users in combination plots
qplot(fans, average_stars, data = ylpUserSm1, geom = c("point", "smooth"), colour = fans) +
labs(title = "Average ratings by the number of fans") + scale_color_gradient(low = "darkblue",
high = "darkred") + stat_smooth(fill = "green", colour = "cyan", size = 1,
alpha = 0.1)
# Elite Yelp group users in combination plots
qplot(fans, average_stars, data = ylpUserSmElite, geom = c("point", "smooth"),
colour = fans) + labs(title = "Average ratings by the number of fans (Elite users)") +
scale_color_gradient(low = "darkblue", high = "red") + stat_smooth(fill = "green",
colour = "cyan", size = 1, alpha = 0.1)
# Non-elite Yelp group users in combination plots
qplot(fans, average_stars, data = ylpUserSmNormal, geom = c("point", "smooth"),
colour = fans) + labs(title = "Average ratings by the number of fans (Non-elite users)") +
scale_color_gradient(low = "darkblue", high = "red") + stat_smooth(fill = "green",
colour = "cyan", size = 1, alpha = 0.1)